
clear matrix
cap log close
set more 1
set matsize 10000
clear



* THIS FILE PREPARES THE ANALYTICAL DATASET USING THE VARIOUS SOURCE FILES DESCRIBED IN THE README FILE






***************************************************************************************************

* insheet source files

clear
insheet using GS.txt
sort file_name
save temp/GS, replace

clear
insheet using GS_alt.txt
sort file_name
save temp/GS_alt, replace

clear
insheet using merged_1315_final.txt
sort file_name
save temp/merged_1315_final, replace

clear
insheet using merged_1315_alt_final.txt
sort file_name
save temp/merged_1315_alt_final, replace

clear
insheet using examiners_first_year.txt
rename uexaminer_name Uexaminer_name
sort file_name
save examiners_first_year, replace

clear
insheet using nonpatentpriorart.txt
sort patent_number
save nonpatentpriorart, replace

clear
insheet using productivity.txt
sort class
save productivity, replace

clear
insheet using filings.txt
sort year class
save filings, replace

clear
insheet using entity_size.txt
sort file_name
save entity_size, replace

clear
insheet using litigation.txt
sort patent_number
save litigation, replace

clear
insheet using applications.txt
save applications, replace

clear
insheet using rejections.txt
save rejections, replace

clear 
insheet using transaction_history.txt
save transaction_history, replace

clear
insheet using class_match.txt
sort class
save class_match, replace







*****************************************************************************************************


* Generate temp data set indicating the date of the first office action for each file_name--i.e., for each application

*****************************************************************************************************

use transaction_history

gen foa = 0
replace foa = 1 if transaction_code == 41 | transaction_code == 42 | transaction_code == 32 | transaction_code == 34 | transaction_code == 75 | transaction_code == 152 | transaction_code == 185 | transaction_code == 799 | transaction_code == 54 | transaction_code == 55


keep if foa == 1


drop if year < 2001 & year != .

gen month_string = string(month) 
replace month_string = "01" if month_string == "1"
replace month_string = "02" if month_string == "2"
replace month_string = "03" if month_string == "3"
replace month_string = "04" if month_string == "4"
replace month_string = "05" if month_string == "5"
replace month_string = "06" if month_string == "6"
replace month_string = "07" if month_string == "7"
replace month_string = "08" if month_string == "8"
replace month_string = "09" if month_string == "9"


gen day_string = string(day) 
replace day_string = "01" if day_string == "1"
replace day_string = "02" if day_string == "2"
replace day_string = "03" if day_string == "3"
replace day_string = "04" if day_string == "4"
replace day_string = "05" if day_string == "5"
replace day_string = "06" if day_string == "6"
replace day_string = "07" if day_string == "7"
replace day_string = "08" if day_string == "8"
replace day_string = "09" if day_string == "9"

gen year_string = string(year)

gen foa_date_prep = month_string + day_string + year_string
gen foa_date = date(foa_date_prep, "MDY")

sort file_name foa_date

by file_name: gen x = _n 


keep if x == 1

keep file_name foa_date year_string
rename year_string foa_year_base
sort file_name
save temp/foa_date, replace

*******************************************************************************************************************

*******************************************************************************************************************




***********************************************************************

* GENERATE FLAG FOR NON-FINAL REJECTION

**********************************************************************

use transaction_history

gen x = 0
replace x = 1 if transaction_code == 42

collapse (sum) x, by(file_name)

gen non_final_rejection = 0
replace non_final_rejection = 1 if x >= 1

drop x

sort file_name
save temp/non_final_rejection, replace


**************************************************************************









******************************************************************************************************************

* SETUP PATENT FAMILY DATA


******************************************************************************************************************

clear
insheet using 201502_TPF_USPTO.txt, delim("|")

gen flag = substr(uspto_nbr, 3,1)

gen issue = 0
replace issue = 1 if flag == "0"

gen app_year = substr(uspto_nbr,3,4) if issue == 0

gen patent_number = substr(uspto_nbr, 4, 100) if issue == 1

replace patent_number = trim(patent_number)


keep if issue == 1
keep patent_number family_id
sort family_id
save temp/family1, replace


clear
insheet using 201502_TPF_Core.txt, delim("|")


gen grant_epo = 0
replace grant_epo = 1 if epo_grant != .

gen grant_jpo = 0
replace grant_jpo = 1 if jpo_grant != .

egen lowest = rowmin(epo_app_first uspto_app_first jpo_app_first)

gen foreign_priority = 0
replace foreign_priority = 1 if (lowest == epo_app_first) | (lowest == jpo_app_first)


keep family_id grant_epo grant_jpo foreign_priority
sort family_id
save temp/family2, replace



use temp/family1
merge family_id using temp/family2
drop _merge

keep if patent_number != ""
bysort family_id: gen count = _n
bysort family_id: egen family_size = max(count)

keep patent_number grant_epo grant_jpo family_size foreign_priority

destring patent_number, replace
sort patent_number

save temp/family3, replace




clear
insheet using 201502_TPF_USPTO.txt, delim("|")

gen flag = substr(uspto_nbr, 3,1)

gen issue = 0
replace issue = 1 if flag == "0"

gen app_no2 = substr(uspto_nbr, 7, 100) if issue == 0
replace app_no2 = trim(app_no)

gen app_no1 = substr(uspto_nbr, 3, 4) if issue == 0

gen app_no = app_no1 + app_no2

keep if issue == 0
keep  family_id app_no
sort family_id
save temp/family4, replace

clear
insheet using 201502_TPF_Core.txt, delim("|")

egen lowest = rowmin(epo_app_first uspto_app_first jpo_app_first)

gen foreign_priority = 0
replace foreign_priority = 1 if (lowest ==epo_app_first) | (lowest ==jpo_app_first)

keep family_id foreign_priority
sort family_id
save temp/family5, replace



use temp/family4

merge family_id using temp/family5

drop _merge

keep if app_no != ""


rename foreign_priority foreign_priority2
keep app_no foreign_priority2 

sort app_no

save temp/family6, replace




********************************************************************************************************************


*******************************************************************************************************************






********************************************************************************************************

* create flag to determine if application was re-docketed to another examiner during the prosecution.  see Appendix of paper
* for discussion of robustness checks addressing such scenarios

********************************************************************************************************



use transaction_history

gen docket = 1 if transaction_code == 7 | transaction_code == 458 | transaction_code == 840

keep if docket == 1


gen month_string = string(month) 
replace month_string = "01" if month_string == "1"
replace month_string = "02" if month_string == "2"
replace month_string = "03" if month_string == "3"
replace month_string = "04" if month_string == "4"
replace month_string = "05" if month_string == "5"
replace month_string = "06" if month_string == "6"
replace month_string = "07" if month_string == "7"
replace month_string = "08" if month_string == "8"
replace month_string = "09" if month_string == "9"


gen day_string = string(day) 
replace day_string = "01" if day_string == "1"
replace day_string = "02" if day_string == "2"
replace day_string = "03" if day_string == "3"
replace day_string = "04" if day_string == "4"
replace day_string = "05" if day_string == "5"
replace day_string = "06" if day_string == "6"
replace day_string = "07" if day_string == "7"
replace day_string = "08" if day_string == "8"
replace day_string = "09" if day_string == "9"

gen year_string = string(year)

gen docket_date_prep = month_string + day_string + year_string
gen date = date(docket_date_prep, "MDY")


sort file_name
merge m:1 file_name using temp/foa_date
drop _merge



keep if file_name > 9800000


gen distance = date - foa_date

gen docketflag = 0
replace docketflag = 1 if distance > 30 & distance != .

collapse (mean) docketflag, by(file_name)

gen flag_redocket2 = 0
replace flag_redocket2 = 1 if docketflag > 0 & docketflag != .

sort file_name
save temp/flag_redocket2, replace




*************************************************************************************************

*************************************************************************************************






***************************************************************************************************

* create flag indicating whether or not the application had an RCE

*************************************************************************************************


use transaction_history

gen x = 0
replace x = 1 if transaction_code == 68

collapse (sum) x, by(file_name)

gen rce = 0
replace rce = 1 if x >= 1

rename x num_rce

sort file_name
save temp/rce_flag, replace



***************************************************************************************************************************


**************************************************************************************************************************














*****************************************************************


* SET UP EXAMINER CITATIONS


****************************************************************


clear
insheet using excite.txt

keep if excite == 1

destring patent, replace ignore("RE")


gen x = 1

collapse (sum) x, by(patent)

rename x num_excites
rename patent patent_number
sort patent_number
save temp/excite, replace

clear


insheet using excite.txt

destring patent, replace ignore("RE")

gen x = 1
collapse (sum) x, by(patent)
rename x total_cites
rename patent patent_number
sort patent_number
save temp/total_cite, replace




******************************************************************


**********************************************************************************************************











*****************************************************************

* SET UP REJECTION DATA

*******************************************************************************************


use rejections

gen obv = 0
replace obv = 1 if rejection == "35USC102and103" 
replace obv = 1 if rejection == "35USC103(a)"
replace obv = 1 if rejection == "35USC103unspecified"

collapse (sum) obv, by(file_name)
gen num_obv = obv
replace obv = 1 if obv >= 1 & obv != .

sort file_name

save temp/obv, replace


use rejections
gen novelty = 0
foreach lname in 35USC102(a) 35USC102(b) 35USC102(d) 35USC102(e) 35USC102(f) 35USC102(g) 35USC102(h) 35USC102(n) 35USC102and103 35USC102unspecified {
replace novelty = 1 if rejection == "`lname'"
}

collapse (sum) novelty, by(file_name)

gen num_novelty = novelty
replace novelty = 1 if novelty >= 1 & novelty != .
sort file_name
save temp/novelty, replace


clear

use rejections

gen psm = 0 

foreach lname in 35USC101 {
replace psm = 1 if rejection == "`lname'"
}

collapse (sum) psm, by(file_name)

gen num_psm = psm
replace psm = 1 if psm >= 1 & psm != .
sort file_name
save temp/psm, replace


clear
use rejections

gen wd  = 0

foreach lname in 35USC112paragraph1 35USC112paragraph2 35USC112paragraph4 35USC112paragraph5 35USC112paragraph6 35USC112unspecified {
replace wd = 1 if rejection == "`lname'"
}

collapse (sum) wd, by(file_name)

gen num_wd = wd

replace wd = 1 if wd >= 1 & wd != .
sort file_name 
save temp/wd, replace

clear



************************************************************************

************************************************************************




***********************************************************************
********************************************************************
**********************************************************************

* begin to set up analytical file, starting with applications data

**********************************************************************


use applications

keep if application_type == "Utility"
destring issue_date_of_patent_year, replace ignore("-")

gen year = status_year
replace year = issue_date_of_patent_year if patent_number != "-"

capture drop _merge


drop if year <= 2001


* merge in grade levels
sort file_name 
merge 1:1 file_name using temp/GS
capture drop _merge


drop if grade == 5

tab grade, gen(GSdum)


rename examiner_name examiner_name_original 

* merge in information of hiring years of examiners
sort file_name
merge 1:1 file_name using examiners_first_year
capture drop _merge


gen experience = year - first_year2


replace experience = 0 if experience < 0 & experience != .
drop if experience < 0


gen class = substr(class_subclass, 1, 3)


sort file_name

merge 1:1 file_name using entity_size
capture drop _merge


gen large = .
replace large = 0 if entity_size == "SMALL"
replace large = 1 if entity_size == "LARGE"


keep if year >= 2002

gen grant = .

replace grant = 0 if status == "Abandoned -- After Examiner's Answer or Board of Appeals Decision"
replace grant = 0 if status == "Abandoned -- Failure to Respond to an Office Action"
replace grant = 0 if status == "Abandoned -- Incomplete Application (Pre-examination)" 
replace grant = 0 if status == "Abandoned -- File-Wrapper-Continuation Parent Application"
replace grant = 0 if status == "Abandoned -- Incomplete (Filing Date Under Rule 53 (b) - PreExam)"
replace grant = 0 if status == "Abandoned -- Pre-PALM Application Added for File-Tracking Purposes"
replace grant = 0 if status == "Expressly Abandoned -- During Examination"

replace grant = 1 if status == "Abandoned -- Failure to Pay Issue Fee"
replace grant = 1 if status == "Allowed -- Notice of Allowance Not Yet Mailed"
replace grant = 1 if status == "Notice of Allowance Mailed -- Application Received in Office of Publications"
replace grant = 1 if status == "Patented Case"

replace grant = 1 if patent_number != "-"




keep if grant == 0 | grant == 1


destring filing_or_371_c_year, ignore("-") replace
destring filing_or_371_c_month, ignore("-") replace
destring filing_or_371_c_day, ignore("-") replace



gen duration = (status_year * 365 + status_month * 12 + status_day) - (filing_or_371_c_year * 365 + filing_or_371_c_month * 12 + filing_or_371_c_day)


tab year, gen(yeardum)
tab class, gen(classdum)


foreach lname in D02 D08 D18 D20 D21 D22 D24 D30 PLT {
drop if class == "`lname'"
}


keep if year >= 2002 & year <= 2012


gen l_duration = log(duration)


gen exam = Uexaminer_name
replace exam = examiner_name if Uexaminer_name == ""


drop if filing_or_371_c_year < 2001


drop if experience == .



sort file_name
merge 1:1 file_name using temp/obv
capture drop _merge
sort file_name
merge 1:1 file_name using temp/novelty
capture drop _merge
sort file_name
merge 1:1 file_name using temp/psm
capture drop _merge
sort file_name
merge 1:1 file_name using temp/wd
capture drop _merge



gen exp_group1 = 0
replace exp_group1 = 1 if experience == 0 | experience == 1 
gen exp_group2 = 0
replace exp_group2 = 1 if experience == 2 | experience == 3 
gen exp_group3 = 0
replace exp_group3 = 1 if experience == 4 | experience == 5 
gen exp_group4 = 0
replace exp_group4 = 1 if experience == 6 | experience == 7
gen exp_group5 = 0
replace exp_group5 = 1 if experience == 8 | experience == 9
gen exp_group6 = 0
replace exp_group6 = 1 if experience == 10 | experience == 11
gen exp_group7 = 0
replace exp_group7 = 1 if experience == 12 | experience == 13
gen exp_group8 = 0
replace exp_group8 = 1 if experience >= 14 & experience != .


gen num_rejections = obv + novelty + psm + wd
gen ratio_obv = obv / num_rejections
gen ratio_novelty = novelty / num_rejections
gen ratio_psm = psm / num_rejections
gen ratio_wd = wd / num_rejections


* save temp analytical working file
save temp/working2, replace


* generate variables indicating years within each grade
gen xx = 1
collapse (sum) xx, by(exam grade year)
capture drop xx
sort exam grade year
by exam grade: gen grade_year = _n
sort exam grade year
save temp/grade_year, replace
use temp/working2
sort exam grade year
merge m:1 exam grade year using temp/grade_year
capture drop _merge


save temp/working2, replace

*********************************************************************************************




***********************************************************************************************

* SET UP ANALYTICAL FILE BASED ON SAMPLE OF ISSUED PATENTS... AS OPPOSED TO APPLICATIONS

***********************************************************************************************


use temp/working2

drop if patent_number == ""
drop if patent_number == "-"
destring patent_number, replace ignore(", A B C D E F G H I J K L M N O P Q R S T U V W X Y Z")


sort patent_number
merge 1:1 patent_number using temp/excite.dta
capture drop _merge
sort patent_number
merge 1:1 patent_number using temp/total_cite
capture drop _merge
sort patent_number
merge 1:1 patent_number using litigation
capture drop _merge

gen lit = 0
replace lit = 1 if invalidity != .

gen invalid2 = invalidity
replace invalid2 = 0 if invalidity == .


replace num_excites = 0 if num_excites == . & total_cites != .
gen excite_ratio = num_excites / total_cites

gen l_excite_ratio = log(excite_ratio)

save temp/working3, replace


*****************************************************************************





*******************************************************************************

* RUN ALGORITHM TO SET UP CONSISTENT EXAMINER FIXED EFFECTS
* the above matching code will match examiner names in applications to the 
* the examiner rosters and will account for variations in how a name appears over
* time.  e.g., Michael Frakes or Michael D. Frakes or Michael Denver Frakes
* for the purposes of creating examiner fixed effects, however, it is 
* preferrable to have consistent name formulations for each examiner.  
* the following code is written to construct that


*******************************************************************************


use temp/working2


gen exam2 = exam
sort examiner_name exam


local iter = 1 
while `iter' < 1765883 {
local iter2 = `iter' + 1
qui replace exam = exam[`iter'] in `iter2' if (examiner_name[`iter'] == examiner_name[`iter'+1]) & (exam[`iter'] ~= exam[`iter'+1])
local iter = `iter' + 1
}


save temp/working2, replace





use temp/working3


gen exam2 = exam
sort examiner_name exam

local iter = 1 
while `iter' < 1791365 {
local iter2 = `iter' + 1
replace exam = exam[`iter'] in `iter2' if (examiner_name[`iter'] == examiner_name[`iter'+1]) & (exam[`iter'] ~= exam[`iter'+1])
local iter = `iter' + 1
}


save temp/working3, replace


**********************************************************************************




use temp/working2

keep file_name exam
sort file_name
save temp/file_name_exam, replace

clear







*********************************************************************************

* recreate grade levels with both GS13 groups based on first-office-action-timing matching



use temp/GS_alt
sort file_name
merge 1:1 file_name using temp/file_name_exam
capture drop _merge

sort file_name 
merge 1:1 file_name using temp/merged_1315_alt_final
capture drop _merge

gen grade_foa_alt = grade_foa
replace grade_foa_alt = 13.1 if grade_foa == 13
replace grade_foa_alt = 13.2 if (factor_foa > 1.24 & factor_foa < 1.26 & factor_foa != .)

replace grade_foa_alt = 12 if (factor_foa > 1.24 & factor_foa < 1.26 & factor_foa != .) & grade_foa == 12

keep file_name grade_foa_alt
sort file_name

save temp/file_name_grade_foa_alt, replace



* create within-grade experience levels for file based on first-office-action timing matching
use temp/GS_alt
sort file_name
merge 1:1 file_name using temp/file_name_exam
capture drop _merge

sort file_name 
merge 1:1 file_name using temp/merged_1315_alt_final
capture drop _merge

gen grade_foa_alt = grade_foa
replace grade_foa_alt = 13.1 if grade_foa == 13
replace grade_foa_alt = 13.2 if (factor_foa > 1.24 & factor_foa < 1.26 & factor_foa != .)

replace grade_foa_alt = 12 if (factor_foa > 1.24 & factor_foa < 1.26 & factor_foa != .) & grade_foa == 12


gen x = 1
collapse (sum) x, by(exam grade_foa_alt year_foa)

bysort exam grade_foa_alt: gen grade_year_foa = _n

keep exam grade_foa_alt year_foa grade_year_foa

sort exam grade_foa_alt year_foa

save temp/grade_year_foa, replace


**************************************************************************************************













************************************************************************************
*
* more prep of application file data
*
**************************************************************************************



* merge in position factors
use temp/working2
sort file_name
merge 1:1 file_name using temp/merged_1315_final
capture drop _merge

* merge in position factors based on time of first office action instead of time of disposition
sort file_name 
merge 1:1 file_name using temp/merged_1315_alt_final
capture drop _merge

* merge in grade levels based on time of first office action instead of time of disposition
sort file_name
merge 1:1 file_name using temp/GS_alt
capture drop _merge

tab grade_foa, gen(GSdum_foa)


sort file_name
merge 1:1 file_name using temp/non_final_rejection
capture drop _merge

gen duration_sq = duration * duration
gen size = 0 if entity_size == "SMALL"
replace size = 1 if entity_size == "LARGE"


* fix grade level dummies.  there was one application with a grade level of 0 indicated...
capture drop GSdum1
rename GSdum2 GSdum2_old
rename GSdum3 GSdum3_old
rename GSdum4 GSdum4_old
rename GSdum5 GSdum5_old
rename GSdum6 GSdum6_old
rename GSdum7 GSdum7_old

gen GSdum1 = .
replace GSdum1 = GSdum2_old
gen GSdum2 = .
replace GSdum2 = GSdum3_old
gen GSdum3 = .
replace GSdum3 = GSdum4_old
gen GSdum4 = .
replace GSdum4 = GSdum5_old
gen GSdum5 = .
replace GSdum5 = GSdum6_old
gen GSdum6 = .
replace GSdum6 = GSdum7_old


* break out separate GS13 groups depending on position factors.  fix group for GS14 to account
* for disparities between position factor rosters and original rosters -- see note above
gen GSdum5_1 = GSdum5
replace GSdum5_1 = 0 if grade == 13 & (factor > 1.24 & factor < 1.26 & factor != .)
gen GSdum5_2 = 0
replace GSdum5_2 = 1 if (factor > 1.24 & factor < 1.26 & factor != .)
gen GSdum6_alt = GSdum6
replace GSdum6_alt = 0 if (factor > 1.24 & factor < 1.26 & factor != .)

replace GSdum5_2 = 0 if grade == 12



* do same for grade levels matched by first office action timing instead of disposition timing
gen GSdum_foa5_1 = GSdum_foa5
replace GSdum_foa5_1 = 0 if grade_foa == 13 & (factor_foa > 1.24 & factor_foa < 1.26 & factor_foa != .)
gen GSdum_foa5_2 = 0
replace GSdum_foa5_2 = 1 if (factor_foa > 1.24 & factor_foa < 1.26 & factor_foa != .)
gen GSdum_foa6_alt = GSdum_foa6
replace GSdum_foa6_alt = 0 if (factor_foa > 1.24 & factor_foa < 1.26 & factor_foa != .)

replace GSdum_foa5_2 = 0 if grade_foa == 12




* TOGGLE
* in main specifications, for the reasons indicated in the paper, we drop GS 15.  toggle this to 
* create the analytical files where they are retained, as we do in the appendix
drop if grade == 15



gen double grade_alt = grade

replace grade_alt = 13.1 if grade == 13
replace grade_alt = 13.2 if factor == 1.25
replace grade_alt = 12 if factor == 1.25 & grade == 12


* TOGGLE
*replace grade_alt = 15 if grade == 15



save temp/working4_1, replace

gen xx = 1
collapse (sum) xx, by(exam grade_alt year)
capture drop xx
sort exam grade_alt year
by exam grade_alt: gen grade_alt_year = _n
sort exam grade_alt year
save temp/grade_alt_year, replace
use temp/working4_1
sort exam grade_alt year
merge m:1 exam grade_alt year using temp/grade_alt_year
capture drop _merge



replace obv = 0 if non_final_rejection == 0
replace psm = 0 if non_final_rejection == 0
replace wd = 0 if non_final_rejection == 0
replace nov = 0 if non_final_rejection == 0


sort file_name
merge 1:1 file_name using temp/flag_redocket2
capture drop _merge

rename flag_redocket2 flag_redocket






tab year_foa, gen(foa_yeardum)



bysort exam: egen maxexp = max(experience)

bysort exam: egen maxyear = max(year)


gen max_exp_1 = 0 if experience != .
replace max_exp_1 = 1 if maxexp >= 0 & maxexp <= 3

gen max_exp_2 = 0 if experience != .
replace max_exp_2 = 1 if maxexp >= 4 & maxexp <= 6

gen max_exp_3 = 0 if experience != .
replace max_exp_3 = 1 if maxexp >= 7 & maxexp <= 9

gen max_exp_4 = 0 if experience != .
replace max_exp_4 = 1 if maxexp >= 10 & maxexp <= 12

gen max_exp_5 = 0 if experience != .
replace max_exp_5 = 1 if maxexp >= 13 & maxexp <= 15

gen max_exp_6 = 0 if experience != .
replace max_exp_6 = 1 if maxexp >= 16

tab first_year2, gen(first_year2dum)





sort file_name
merge 1:1 file_name using temp/rce_flag
capture drop _merge




bysort exam: egen maxgrade = max(grade)
bysort exam: egen mingrade = min(grade)
bysort exam: egen minexperience = min(experience)
bysort exam: egen maxexperience = max(experience)


gen balance = 0
replace balance = 1 if maxgrade == 14 & mingrade == 7

gen balance2 = 0
replace balance2 = 1 if maxgrade == 14 & mingrade <= 11 & mingrade != .



destring class, replace 

sort class
merge m:1 class using class_match
capture drop _merge

tab subcat, gen(subcatdum)



foreach num2 of numlist 1/12 {
foreach num of numlist 1/37 {
gen cy_`num2'_`num' = yeardum`num2' * subcatdum`num'
}
}



destring patent_number, replace ignore("- , A B C D E F G H I J K L M N O P Q R S T U V W X Y Z") force


sort patent_number
merge patent_number using temp/family3
drop if _merge == 2
capture drop _merge

gen family = 0
replace family = 1 if grant_epo != .


gen app_no2 = substr(earliest_publication_no, 10,6)
gen app_no1 = substr(earliest_publication_no, 4,4)
gen app_no = app_no1 + app_no2

sort app_no
merge m:1 app_no using temp/family6 
drop if _merge == 2
capture drop _merge

gen foreign_p = 0
replace foreign_p = 1 if foreign_priority == 1 | foreign_priority2 == 1 



sort year class
merge m:1 year class using filings
capture drop _merge


sort patent_number
merge patent_number using litigation
drop if _merge == 2
capture drop _merge

gen asserted2 = asserted
replace asserted2 = 0 if invalidity == .

gen expose = 2014 - year



sort class
merge m:1 class using productivity
capture drop _merge

gen scaling_factor = .
replace scaling_factor = 1 if grade == 12
replace scaling_factor = .55 if grade == 5
replace scaling_factor = .7 if grade == 7
replace scaling_factor = .8 if grade == 9
replace scaling_factor = .9 if grade == 11
replace scaling_factor = 1.15 if grade_alt == 13.1
replace scaling_factor = 1.25 if grade_alt == 13.2
replace scaling_factor = 1.35 if grade_alt == 14 | grade == 15


gen hours = .

replace hours = count / scaling_factor




* create groups indicating given experience years within given grade levels (for GS12+)
gen exp_group1_12 = 0
replace exp_group1_12 = 1 if (grade_year == 1 | grade_year == 2) & grade == 12
gen exp_group2_12 = 0
replace exp_group2_12 = 1 if (grade_year == 3 | grade_year == 4) & grade == 12
gen exp_group3_12 = 0
replace exp_group3_12 = 1 if (grade_year == 5 | grade_year == 6) & grade == 12
gen exp_group4_12 = 0
replace exp_group4_12 = 1 if (grade_year == 7 | grade_year == 8) & grade == 12
gen exp_group5_12 = 0
replace exp_group5_12 = 1 if grade_year >= 9 & grade_year != . & grade == 12


replace exp_group1_12 = 0 if grade_alt == 13.2
replace exp_group2_12 = 0 if grade_alt == 13.2
replace exp_group3_12 = 0 if grade_alt == 13.2
replace exp_group4_12 = 0 if grade_alt == 13.2
replace exp_group5_12 = 0 if grade_alt == 13.2


gen exp_group1_13_alt = 0
replace exp_group1_13_alt = 1 if (grade_alt_year == 1 | grade_alt_year == 2) & grade_alt == 13.1
gen exp_group2_13_alt = 0
replace exp_group2_13_alt = 1 if (grade_alt_year == 3 | grade_alt_year == 4) & grade_alt == 13.1
gen exp_group3_13_alt = 0
replace exp_group3_13_alt = 1 if (grade_alt_year == 5 | grade_alt_year == 6) & grade_alt == 13.1
gen exp_group4_13_alt = 0
replace exp_group4_13_alt = 1 if (grade_alt_year == 7 | grade_alt_year == 8) & grade_alt == 13.1
gen exp_group5_13_alt = 0
replace exp_group5_13_alt = 1 if grade_alt_year >= 9 & grade_alt_year != . & grade_alt == 13.1

gen exp_group1_13_alt2 = 0
replace exp_group1_13_alt2 = 1 if (grade_alt_year == 1 | grade_alt_year == 2) & grade_alt == 13.2
gen exp_group2_13_alt2 = 0
replace exp_group2_13_alt2 = 1 if (grade_alt_year == 3 | grade_alt_year == 4) & grade_alt == 13.2
gen exp_group3_13_alt2 = 0
replace exp_group3_13_alt2 = 1 if (grade_alt_year == 5 | grade_alt_year == 6) & grade_alt == 13.2
gen exp_group4_13_alt2 = 0
replace exp_group4_13_alt2 = 1 if (grade_alt_year == 7 | grade_alt_year == 8) & grade_alt == 13.2
gen exp_group5_13_alt2 = 0
replace exp_group5_13_alt2 = 1 if grade_alt_year >= 9 & grade_alt_year != . & grade_alt == 13.2

gen exp_group1_14_alt = 0
replace exp_group1_14_alt = 1 if (grade_alt_year == 1 | grade_alt_year == 2) & grade_alt == 14
gen exp_group2_14_alt = 0
replace exp_group2_14_alt = 1 if (grade_alt_year == 3 | grade_alt_year == 4) & grade_alt == 14
gen exp_group3_14_alt = 0
replace exp_group3_14_alt = 1 if (grade_alt_year == 5 | grade_alt_year == 6) & grade_alt == 14
gen exp_group4_14_alt = 0
replace exp_group4_14_alt = 1 if (grade_alt_year == 7 | grade_alt_year == 8) & grade_alt == 14
gen exp_group5_14_alt = 0
replace exp_group5_14_alt = 1 if grade_alt_year >= 9 & grade_alt_year != . & grade_alt == 14



* TOGGLE INCLUDING THIS
/*
gen exp_group1_15 = 0
replace exp_group1_15 = 1 if (grade_alt_year == 1 | grade_alt_year == 2) & grade_alt == 15
gen exp_group2_15 = 0
replace exp_group2_15 = 1 if (grade_alt_year == 3 | grade_alt_year == 4) & grade_alt == 15
gen exp_group3_15 = 0
replace exp_group3_15 = 1 if (grade_alt_year == 5 | grade_alt_year == 6) & grade_alt == 15
gen exp_group4_15 = 0
replace exp_group4_15 = 1 if (grade_alt_year == 7 | grade_alt_year == 8) & grade_alt == 15
gen exp_group5_15 = 0
replace exp_group5_15 = 1 if grade_alt_year >= 9 & grade_alt_year != . & grade_alt == 15
*/




replace duration = .
replace duration_sq = .

destring issue_date_of_patent_month, replace ignore("-")
destring issue_date_of_patent_day, replace ignore("-")


replace duration = (status_year * 365 + status_month * 12 + status_day) - (filing_or_371_c_year * 365 + filing_or_371_c_month * 12 + filing_or_371_c_day) if grant == 0
replace duration = (issue_date_of_patent_year * 365 + issue_date_of_patent_month * 12 + issue_date_of_patent_day) - (filing_or_371_c_year * 365 + filing_or_371_c_month * 12 + filing_or_371_c_day) if grant == 1

replace duration_sq = duration*duration




keep if file_name != .

sort file_name
merge 1:1 file_name using temp/file_name_grade_foa_alt
capture drop _merge


sort exam grade_foa_alt year_foa
merge m:1 exam grade_foa_alt year_foa using temp/grade_year_foa
capture drop _merge

gen grade_year_foa_alt = grade_year_foa

gen exp_group1_12_foa = 0
replace exp_group1_12_foa = 1 if (grade_year_foa_alt == 1 | grade_year_foa_alt == 2) & grade_foa_alt == 12
gen exp_group2_12_foa = 0
replace exp_group2_12_foa = 1 if (grade_year_foa_alt == 3 | grade_year_foa_alt == 4) & grade_foa_alt == 12
gen exp_group3_12_foa = 0
replace exp_group3_12_foa = 1 if (grade_year_foa_alt == 5 | grade_year_foa_alt == 6) & grade_foa_alt == 12
gen exp_group4_12_foa = 0
replace exp_group4_12_foa = 1 if (grade_year_foa_alt == 7 | grade_year_foa_alt == 8) & grade_foa_alt == 12
gen exp_group5_12_foa = 0
replace exp_group5_12_foa = 1 if grade_year_foa_alt >= 9 & grade_year_foa_alt != . & grade_foa_alt == 12

gen exp_group1_13_1_foa = 0
replace exp_group1_13_1_foa = 1 if (grade_year_foa_alt == 1 | grade_year_foa_alt == 2) & grade_foa_alt > 13.05 & grade_foa_alt < 13.15 & grade_foa_alt != .
gen exp_group2_13_1_foa = 0
replace exp_group2_13_1_foa = 1 if (grade_year_foa_alt == 3 | grade_year_foa_alt == 4) & grade_foa_alt > 13.05 & grade_foa_alt < 13.15 & grade_foa_alt != .
gen exp_group3_13_1_foa = 0
replace exp_group3_13_1_foa = 1 if (grade_year_foa_alt == 5 | grade_year_foa_alt == 6) & grade_foa_alt > 13.05 & grade_foa_alt < 13.15 & grade_foa_alt != .
gen exp_group4_13_1_foa = 0
replace exp_group4_13_1_foa = 1 if (grade_year_foa_alt == 7 | grade_year_foa_alt == 8) & grade_foa_alt > 13.05 & grade_foa_alt < 13.15 & grade_foa_alt != .
gen exp_group5_13_1_foa = 0
replace exp_group5_13_1_foa = 1 if grade_year_foa_alt >= 9 & grade_year_foa_alt != . & grade_foa_alt > 13.05 & grade_foa_alt < 13.15 & grade_foa_alt != .

gen exp_group1_13_2_foa = 0
replace exp_group1_13_2_foa = 1 if (grade_year_foa_alt == 1 | grade_year_foa_alt == 2) & grade_foa_alt > 13.15 & grade_foa_alt < 13.25 & grade_foa_alt != .
gen exp_group2_13_2_foa = 0
replace exp_group2_13_2_foa = 1 if (grade_year_foa_alt == 3 | grade_year_foa_alt == 4) & grade_foa_alt > 13.15 & grade_foa_alt < 13.25 & grade_foa_alt != .
gen exp_group3_13_2_foa = 0
replace exp_group3_13_2_foa = 1 if (grade_year_foa_alt == 5 | grade_year_foa_alt == 6) & grade_foa_alt > 13.15 & grade_foa_alt < 13.25 & grade_foa_alt != .
gen exp_group4_13_2_foa = 0
replace exp_group4_13_2_foa = 1 if (grade_year_foa_alt == 7 | grade_year_foa_alt == 8) & grade_foa_alt > 13.15 & grade_foa_alt < 13.25 & grade_foa_alt != .
gen exp_group5_13_2_foa = 0
replace exp_group5_13_2_foa = 1 if grade_year_foa_alt >= 9 & grade_year_foa_alt != . & grade_foa_alt > 13.15 & grade_foa_alt < 13.25 & grade_foa_alt != .

gen exp_group1_14_foa = 0
replace exp_group1_14_foa = 1 if (grade_year_foa_alt == 1 | grade_year_foa_alt == 2) & grade_foa_alt == 14
gen exp_group2_14_foa = 0
replace exp_group2_14_foa = 1 if (grade_year_foa_alt == 3 | grade_year_foa_alt == 4) & grade_foa_alt== 14
gen exp_group3_14_foa = 0
replace exp_group3_14_foa = 1 if (grade_year_foa_alt == 5 | grade_year_foa_alt == 6) & grade_foa_alt == 14
gen exp_group4_14_foa = 0
replace exp_group4_14_foa = 1 if (grade_year_foa_alt == 7 | grade_year_foa_alt == 8) & grade_foa_alt == 14
gen exp_group5_14_foa = 0
replace exp_group5_14_foa = 1 if grade_year_foa_alt >= 9 & grade_year_foa_alt != . & grade_foa_alt == 14



* This is the final analytical file for the application-level dataset



rename GSdum2 gsdum2 
rename GSdum3 gsdum3
rename GSdum4 gsdum4
rename GSdum5_1 gsdum5_1 
rename GSdum5_2 gsdum5_2 
rename GSdum6_alt gsdum6_alt

rename GSdum_foa2 gsdum_foa2 
rename GSdum_foa3 gsdum_foa3
rename GSdum_foa4 gsdum_foa4
rename GSdum_foa5_1 gsdum_foa5_1 
rename GSdum_foa5_2 gsdum_foa5_2 
rename GSdum_foa6_alt gsdum_foa6_alt



save analytical_application, replace



************************************************************************************

***********************************************************************************










*****************************************************************************************

* final prep of patented subsample

****************************************************************************************


clear

use temp/working3


capture drop GSdum1
rename GSdum2 GSdum2_old
rename GSdum3 GSdum3_old
rename GSdum4 GSdum4_old
rename GSdum5 GSdum5_old
rename GSdum6 GSdum6_old
rename GSdum7 GSdum7_old

gen GSdum1 = .
replace GSdum1 = GSdum2_old
gen GSdum2 = .
replace GSdum2 = GSdum3_old
gen GSdum3 = .
replace GSdum3 = GSdum4_old
gen GSdum4 = .
replace GSdum4 = GSdum5_old
gen GSdum5 = .
replace GSdum5 = GSdum6_old
gen GSdum6 = .
replace GSdum6 = GSdum7_old

* TOGGLE
drop if grade == 15

gen size = 0 if entity_size == "SMALL"
replace size = 1 if entity_size == "LARGE"



capture drop invalidity asserted
sort patent_number
merge 1:1 patent_number using litigation
drop if _merge == 2
capture drop _merge




sort patent_number
merge 1:1 patent_number using temp/family3
drop if _merge == 2
capture drop _merge


gen family = 0
replace family = 1 if grant_epo != .


gen grant_epo_jpo = 0
replace grant_epo_jpo = 1 if grant_epo == 1 & grant_jpo == 1




sort patent_number
merge 1:1 patent_number using nonpatentpriorart
drop if _merge == 2
capture drop _merge



drop if file_name == .

sort file_name
merge 1:1 file_name using temp/merged_1315_final
drop if _merge == 2
capture drop _merge



gen GSdum5_1 = GSdum5
replace GSdum5_1 = 0 if grade == 13 & (factor > 1.24 & factor < 1.26 & factor != .)
gen GSdum5_2 = 0
replace GSdum5_2 = 1 if (factor > 1.24 & factor < 1.26 & factor != .)
gen GSdum6_alt = GSdum6
replace GSdum6_alt = 0 if (factor > 1.24 & factor < 1.26 & factor != .)

replace GSdum5_2 = 0 if grade == 12



gen excite_ratio_new =  (num_excites + nonpatent_by_examiner) / (total_cites + nonpatent_by_applicant + nonpatent_by_examiner)
gen total_excites = log(num_excites + nonpatent_by_examiner)
gen l_num_excites = log(num_excites)
gen app_cites = log(total_cites - num_excites + nonpatent_by_applicant + 1)
gen app_cite_ratio =  (total_cites - num_excites + nonpatent_by_applicant) / (total_cites + nonpatent_by_applicant + nonpatent_by_examiner)








gen asserted2 = asserted
replace asserted2 = 0 if invalidity == .



keep if grant != .



destring class, replace

sort class
merge m:1 class using class_match
capture drop _merge

tab subcat, gen(subcatdum)
tab cat, gen(catdum)

foreach num2 of numlist 1/12 {
foreach num of numlist 1/37 {
gen cy_`num2'_`num' = yeardum`num2' * subcatdum`num'
}
}

foreach num2 of numlist 1/12 {
foreach num of numlist 1/6 {
*gen cat_`num2'_`num' = yeardum`num2' * catdum`num'
}
}



gen double grade_alt = grade
replace grade_alt = 13.1 if grade == 13
replace grade_alt = 13.2 if factor == 1.25
replace grade_alt = 12 if factor == 1.25 & grade == 12



save temp/working5, replace



gen xx = 1
collapse (sum) xx, by(exam grade_alt year)
capture drop xx
sort exam grade_alt year
by exam grade_alt: gen grade_alt_year = _n
sort exam grade_alt year
save temp/grade_alt_year_2, replace
use temp/working5
sort exam grade_alt year
merge m:1 exam grade_alt year using temp/grade_alt_year_2
capture drop _merge


gen exp_group1_12 = 0
replace exp_group1_12 = 1 if (grade_year == 1 | grade_year == 2) & grade == 12
gen exp_group2_12 = 0
replace exp_group2_12 = 1 if (grade_year == 3 | grade_year == 4) & grade == 12
gen exp_group3_12 = 0
replace exp_group3_12 = 1 if (grade_year == 5 | grade_year == 6) & grade == 12
gen exp_group4_12 = 0
replace exp_group4_12 = 1 if (grade_year == 7 | grade_year == 8) & grade == 12
gen exp_group5_12 = 0
replace exp_group5_12 = 1 if grade_year >= 9 & grade_year != . & grade == 12


replace exp_group1_12 = 0 if grade_alt == 13.2
replace exp_group2_12 = 0 if grade_alt == 13.2
replace exp_group3_12 = 0 if grade_alt == 13.2
replace exp_group4_12 = 0 if grade_alt == 13.2
replace exp_group5_12 = 0 if grade_alt == 13.2


gen exp_group1_13_alt = 0
replace exp_group1_13_alt = 1 if (grade_alt_year == 1 | grade_alt_year == 2) & grade_alt == 13.1
gen exp_group2_13_alt = 0
replace exp_group2_13_alt = 1 if (grade_alt_year == 3 | grade_alt_year == 4) & grade_alt == 13.1
gen exp_group3_13_alt = 0
replace exp_group3_13_alt = 1 if (grade_alt_year == 5 | grade_alt_year == 6) & grade_alt == 13.1
gen exp_group4_13_alt = 0
replace exp_group4_13_alt = 1 if (grade_alt_year == 7 | grade_alt_year == 8) & grade_alt == 13.1
gen exp_group5_13_alt = 0
replace exp_group5_13_alt = 1 if grade_alt_year >= 9 & grade_alt_year != . & grade_alt == 13.1

gen exp_group1_13_alt2 = 0
replace exp_group1_13_alt2 = 1 if (grade_alt_year == 1 | grade_alt_year == 2) & grade_alt == 13.2
gen exp_group2_13_alt2 = 0
replace exp_group2_13_alt2 = 1 if (grade_alt_year == 3 | grade_alt_year == 4) & grade_alt == 13.2
gen exp_group3_13_alt2 = 0
replace exp_group3_13_alt2 = 1 if (grade_alt_year == 5 | grade_alt_year == 6) & grade_alt == 13.2
gen exp_group4_13_alt2 = 0
replace exp_group4_13_alt2 = 1 if (grade_alt_year == 7 | grade_alt_year == 8) & grade_alt == 13.2
gen exp_group5_13_alt2 = 0
replace exp_group5_13_alt2 = 1 if grade_alt_year >= 9 & grade_alt_year != . & grade_alt == 13.2

gen exp_group1_14_alt = 0
replace exp_group1_14_alt = 1 if (grade_alt_year == 1 | grade_alt_year == 2) & grade_alt == 14
gen exp_group2_14_alt = 0
replace exp_group2_14_alt = 1 if (grade_alt_year == 3 | grade_alt_year == 4) & grade_alt == 14
gen exp_group3_14_alt = 0
replace exp_group3_14_alt = 1 if (grade_alt_year == 5 | grade_alt_year == 6) & grade_alt == 14
gen exp_group4_14_alt = 0
replace exp_group4_14_alt = 1 if (grade_alt_year == 7 | grade_alt_year == 8) & grade_alt == 14
gen exp_group5_14_alt = 0
replace exp_group5_14_alt = 1 if grade_alt_year >= 9 & grade_alt_year != . & grade_alt == 14




replace duration = .
gen duration_sq = .

destring issue_date_of_patent_month, replace ignore("-")
destring issue_date_of_patent_day, replace ignore("-")


replace duration = (status_year * 365 + status_month * 12 + status_day) - (filing_or_371_c_year * 365 + filing_or_371_c_month * 12 + filing_or_371_c_day) if grant == 0
replace duration = (issue_date_of_patent_year * 365 + issue_date_of_patent_month * 12 + issue_date_of_patent_day) - (filing_or_371_c_year * 365 + filing_or_371_c_month * 12 + filing_or_371_c_day) if grant == 1

replace duration_sq = duration*duration



rename GSdum2 gsdum2 
rename GSdum3 gsdum3
rename GSdum4 gsdum4
rename GSdum5_1 gsdum5_1 
rename GSdum5_2 gsdum5_2 
rename GSdum6_alt gsdum6_alt


* save final analytical working file for patented dataset
save analytical_patented, replace


****************************************************************************************************************************************************





**************************************************************************************************************************

* setup data for specification check in appendix in which follow examiners in years leading up to GS-14 promotion regardless of
* GS level before then


use temp/working4_1

gen grade14 = 0
replace grade14 = 1 if grade_alt == 14
gen grade13_2 = 0
replace grade13_2 = 1 if grade_alt == 13.2


collapse (mean) grade14 grade13_2, by(exam year)

sort exam year

by exam: gen lead1grade14 = grade14[_n+1]
by exam: gen lead2grade14 = grade14[_n+2]
by exam: gen lead3grade14 = grade14[_n+3]
by exam: gen lead4grade14 = grade14[_n+4]

replace lead1grade14 = 1 if grade14 == 1 & lead1grade14 == .  
replace lead2grade14 = 1 if grade14 == 1 & lead2grade14 == .  
replace lead3grade14 = 1 if grade14 == 1 & lead3grade14 == .  
replace lead4grade14 = 1 if grade14 == 1 & lead4grade14 == .  

replace lead1grade14 = 0 if grade14 == 0 & lead1grade14 == .  
replace lead2grade14 = 0 if grade14 == 0 & lead2grade14 == .  
replace lead3grade14 = 0 if grade14 == 0 & lead3grade14 == .  
replace lead4grade14 = 0 if grade14 == 0 & lead4grade14 == .  


by exam: gen l1grade14 = grade14[_n-1]
by exam: gen l2grade14 = grade14[_n-2]
by exam: gen l3grade14 = grade14[_n-3]
by exam: gen l4grade14 = grade14[_n-4]


replace l1grade14 = 0 if grade14 == 0 & lead1grade14 == .  
replace l2grade14 = 0 if grade14 == 0 & lead2grade14 == .  
replace l3grade14 = 0 if grade14 == 0 & lead3grade14 == .  
replace l4grade14 = 0 if grade14 == 0 & lead4grade14 == .  



by exam: gen lead1grade13_2 = grade13_2[_n+1]
by exam: gen lead2grade13_2 = grade13_2[_n+2]
by exam: gen lead3grade13_2 = grade13_2[_n+3]
by exam: gen lead4grade13_2 = grade13_2[_n+4]

replace lead1grade13_2 = 1 if grade13_2 == 1 & lead1grade13_2 == .  
replace lead2grade13_2 = 1 if grade13_2 == 1 & lead2grade13_2 == .  
replace lead3grade13_2 = 1 if grade13_2 == 1 & lead3grade13_2 == .  
replace lead4grade13_2 = 1 if grade13_2 == 1 & lead4grade13_2 == .  

replace lead1grade13_2 = 0 if grade13_2 == 0 & lead1grade13_2 == .  
replace lead2grade13_2 = 0 if grade13_2 == 0 & lead2grade13_2 == .  
replace lead3grade13_2 = 0 if grade13_2 == 0 & lead3grade13_2 == .  
replace lead4grade13_2 = 0 if grade13_2 == 0 & lead4grade13_2 == .  



by exam: gen l1grade13_2 = grade13_2[_n-1]
by exam: gen l2grade13_2 = grade13_2[_n-2]
by exam: gen l3grade13_2 = grade13_2[_n-3]
by exam: gen l4grade13_2 = grade13_2[_n-4]

replace l1grade13_2 = 0 if grade13_2 == 0 & lead1grade13_2 == .  
replace l2grade13_2 = 0 if grade13_2 == 0 & lead2grade13_2 == .  
replace l3grade13_2 = 0 if grade13_2 == 0 & lead3grade13_2 == .  
replace l4grade13_2 = 0 if grade13_2 == 0 & lead4grade13_2 == .  


gen leadgroup1_14 = 0
replace leadgroup1_14 = 1 if lead1grade14 == 1 | lead2grade14 == 1

gen leadgroup2_14 = 0
replace leadgroup2_14 = 1 if lead3grade14 == 1 | lead4grade14 == 1


sort exam year

save temp/leads14, replace

*********************************************************************************************************************


















********************************************************************************************************************************

* Several of the source files, which were merged in above, contain results of a matching process between the examiner

* names in the PAIR records and the various examiner rosters providing information on grade levels.  The "fuzzy matching" programs

* used for the purposes are very time consuming--hence, our provision of the results of these efforts to save time in setting up

* the analytical dataset.  nonetheless, we have also provided the code used to execute these matches below.

********************************************************************************************************************************




* MATCH EACH APPLICATION IN THE PAIR DATA WITH AN EXAMINER ROSTER CONTAINING INFORMATION ON GS-LEVEL OF EXAMINER FOR GIVEN YEAR

* IN MAIN APPROACH, THIS MATCH IS DONE AT THE TIME OF DISPOSITION OF APPLICATION.  START WITH PERFECT MATCHES, THEN USE RECLINK 

* TO EXECUTE FUZZY MATCHES.  NOTE THAT WE ACCEPT ALL SUGGESTED MATCHES FROM RECLINK.  OUT OF THE MILLION+ OBSERVATIONS, SOME ARE 

* LIKELY TO BE INCORRECT.  WE NOTE HOWEVER THAT THE RESULTS OF THE PAPER ARE ROBUST TO CONDITIONING ON PERFECT MATCHES OR TO 

* SETTING HIGH THRESHOLD SCORES USING THE RECLINK SCORE METRIC

********************************************************************************************************************************


/*
clear
insheet using examiners_gs.txt
sort examiner_name year
save temp/examiners_gs, replace




use applications

destring issue_date_of_patent_year, replace ignore("-")

gen year = status_year
replace year = issue_date_of_patent_year if patent_number != "-"

capture drop _merge

drop if examiner_name == ""
drop if examiner_name == "-"

sort examiner_name year
merge examiner_name year using temp/examiners_gs

keep if _merge == 3

capture drop _merge

keep examiner_name year file_name grade

* perfect merge file
save temp/merge_gs_0_new, replace




* now, work on fuzzy matches.  do separately by year for efficiency purposes
clear

foreach lname in 2012 2011 2010 2009 2008 2007 2006 2005 2004 2003 2002 2001 {

use applications

destring issue_date_of_patent_year, replace ignore("-")

gen year = status_year
replace year = issue_date_of_patent_year if patent_number != "-"

capture drop _merge

drop if examiner_name == ""
drop if examiner_name == "-"

sort examiner_name year
merge examiner_name year using temp/examiners_gs

* pull out only the failed merges
keep if _merge == 1
capture drop _merge


keep examiner_name year file_name

keep if year == `lname'

gen idmaster = _n

* fuzzy name matching
reclink examiner_name using temp/examiner_gs_`lname', idmaster(idmaster) idusing(idusing) gen(score)

save temp/merge_gs_`lname', replace


clear
}




use temp/merge_gs_2001
foreach num of numlist 2002/2012 {
append using temp/merge_gs_`num'
}
append using temp/merge_gs_0_new

keep file_name year grade

gen x = 1

collapse (mean) grade (sum) x, by(file_name)

keep if x == 1

capture drop x

sort file_name

save temp/GS, replace



***********************************************************************************************

***********************************************************************************************


*/




**********************************************************************************************

* DO THE SAME, BUT NOW DO SO BASED ON THE TIME OF FIRST OFFICE ACTION

**********************************************************************************************

/*

use applications

capture drop _merge
sort file_name
merge 1:1 file_name using temp/foa_date
capture drop _merge

gen year = foa_year_base
destring year, replace

drop if examiner_name == ""
drop if examiner_name == "-"

sort examiner_name year
merge examiner_name year using temp/examiners_gs

keep if _merge == 3
capture drop _merge


keep examiner_name year file_name grade

save temp/merge_gs_alt_0, replace

clear



foreach lname in 2012 2011 2010 2009 2008 2007 2006 2005 2004 2003 2002 2001 {

use applications

capture drop _merge
sort file_name
merge 1:1 file_name using temp/foa_date
capture drop _merge

gen year = foa_year_base
destring year, replace

drop if examiner_name == ""
drop if examiner_name == "-"

sort examiner_name year
merge examiner_name year using temp/examiners_gs

keep if _merge == 1
capture drop _merge


keep examiner_name year file_name

keep if year == `lname'

gen idmaster = _n

reclink examiner_name using temp/examiner_gs_`lname', idmaster(idmaster) idusing(idusing) gen(score)

save temp/merge_gs_alt_`lname', replace


clear
}



use temp/merge_gs_alt_2001
foreach num of numlist 2002/2012 {
append using temp/merge_gs_alt_`num'
}
append using temp/merge_gs_alt_0

keep file_name year grade

gen x = 1

collapse (mean) grade year (sum) x, by(file_name)

keep if x == 1

capture drop x

rename year year_foa
rename grade grade_foa

drop if grade_foa == 5 | grade_foa == 15

drop if grade_foa == 0
sort file_name

save temp/GS_alt, replace

*/





/*

*****************************************************************************

* SET UP POSITION FACTOR DATA FOR GS13

* The above code will merge application data with the GS level of the examiner
* It is also important to distinguish between 2 different types of GS-13 examiners, 
* one with partial signatory authority and one without, a distinction that bears
* on their examination time allocations.  We received a separate roster which contains
* information on the position factors necessary to distinguish these types of 
* GS-13 examiners.  Note that there will be some descrpancies across this roster and the 
* previous roster--e.g., with this latter roster, some examiners registering as being  
* GS-13 with partial signatory authority in a given year and, for the former roster, those
* same examiners registering as GS-14 in that year.  When this GS13/14 discrepancy existed,
* we chose to give dominance to these latter rosters.  Nonetheless, the results of this paper
* are robust to giving dominance to the former rosters.

********************************************************************************************



foreach lname in 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 {


clear
insheet using position_factor.txt

keep if year == `lname'

capture drop role

keep if (grade == 13 & factor > 1.149 & factor < 1.151 & factor != .) | (grade == 13 & factor > 1.249 & factor < 1.251 & factor != .) |(grade == 15 & factor > 1.349 & factor < 1.351 & factor != .) | (grade == 15 & factor > 1.399 & factor < 1.401 & factor != .)  | (grade == 15 & factor > 1.499 & factor < 1.501 & factor != .)

rename grade grade2
rename exam examiner_name

sort examiner_name
gen idusing = _n

save temp/position_factor_`lname', replace


use temp/working2
keep if year == `lname'
keep if grade == 12 | grade == 13 | grade == 14 | grade == 15

keep year grade examiner_name file_name

sort examiner_name
gen idmaster = _n 


reclink examiner_name using temp/position_factor_`lname', idmaster(idmaster) idusing(idusing) gen(score)

save temp/merged_1315_`lname', replace

}


use temp/merged_1315_2002

foreach lname in 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 {
append using temp/merged_1315_`lname'
}

bysort file_name: gen yy = _n 

keep if yy == 1

keep file_name grade2 factor
sort file_name

save temp/merged_1315_final, replace


*******************************************************************************************

*********************************************************************************************





********************************************************************************************

* Same thing--position factor matching--but do it based on time of first office action

********************************************************************************************


foreach lname in 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 {

clear
insheet using position_factor.txt

keep if year == `lname'

capture drop role

keep if (grade == 13 & factor > 1.149 & factor < 1.151 & factor != .) | (grade == 13 & factor > 1.249 & factor < 1.251 & factor != .) |(grade == 15 & factor > 1.349 & factor < 1.351 & factor != .) | (grade == 15 & factor > 1.399 & factor < 1.401 & factor != .)  | (grade == 15 & factor > 1.499 & factor < 1.501 & factor != .)

rename grade grade2
rename exam examiner_name

sort examiner_name
gen idusing = _n

save temp/position_factor_`lname', replace


use temp/GS_alt
keep if year_foa == `lname'
keep if grade_foa == 12 | grade_foa == 13 | grade_foa == 14 | grade_foa == 15

sort file_name
merge 1:1 file_name using temp/file_name_exam
keep if _merge == 3

rename exam examiner_name

keep year_foa grade_foa examiner_name file_name

sort examiner_name
gen idmaster = _n 


reclink examiner_name using temp/position_factor_`lname', idmaster(idmaster) idusing(idusing) gen(score)

save temp/merged_1315_alt_`lname', replace

}



use temp/merged_1315_alt_2002

foreach lname in 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 {
append using temp/merged_1315_alt_`lname'
}

bysort file_name: gen yy = _n 

keep if yy == 1

keep file_name factor
rename factor factor_foa
sort file_name

save temp/merged_1315_alt_final, replace

****************************************************************************************


*/




